library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.5 ✓ purrr 0.3.4
## ✓ tibble 3.0.4 ✓ dplyr 1.0.2
## ✓ tidyr 1.1.2 ✓ stringr 1.4.0
## ✓ readr 1.4.0 ✓ forcats 0.5.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
setwd("/home/rguerillot/Documents/Travail/Abdou_project/Staph_infection_project/github_analysis/VANANZ_phenotypes")
# import cell count operetta data
read_tsv_filename <- function(flnm) {
read_tsv(flnm, skip = 9, trim_ws = T) %>%
mutate(filename = flnm)
}
Spot_count.df <-list.files(path = "Operetta/raw_data/210921 THP1 n2/", pattern = "*Spots", full.names = T, recursive = T) %>%
map_df(~read_tsv_filename(.))
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## .default = col_double(),
## `Cell Type` = col_character(),
## Strain = col_character(),
## `Bounding Box` = col_character()
## )
## ℹ Use `spec()` for the full column specifications.
## ! Multiple files in zip: reading ''_rels/.rels''
## Warning: Missing column names filled in: 'X1' [1]
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## X1 = col_character()
## )
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## .default = col_double(),
## `Cell Type` = col_character(),
## Strain = col_character(),
## `Bounding Box` = col_character()
## )
## ℹ Use `spec()` for the full column specifications.
## ! Multiple files in zip: reading ''[Content_Types].xml''
## Warning: Missing column names filled in: 'X1' [1]
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## X1 = col_character()
## )
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## .default = col_double(),
## `Cell Type` = col_character(),
## Strain = col_character(),
## `Bounding Box` = col_character()
## )
## ℹ Use `spec()` for the full column specifications.
## ! Multiple files in zip: reading ''[Content_Types].xml''
## Warning: Missing column names filled in: 'X1' [1]
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## X1 = col_character()
## )
unique(Spot_count.df$filename)
## [1] "Operetta/raw_data/210921 THP1 n2//210921_THP1_1.5h_Objects_Population - Spots.tsv"
## [2] "Operetta/raw_data/210921 THP1 n2//210921_THP1_24h_Objects_Population - Spots.tsv"
## [3] "Operetta/raw_data/210921 THP1 n2//210921_THP1_5h_Objects_Population - Spots.tsv"
# Tidy Operetta combined dataframe
#names(Spot_count.df) <- gsub(" ", "_", names(Spot_count.df))
#names(Spot_count.df) <- gsub("Cells_-_", "", names(Spot_count.df))
Spot_count_clean.df <- Spot_count.df %>%
separate(col = filename, sep = "/", into = c("a","b","c", "d", "experiment_id")) %>%
separate(col = experiment_id, sep = "_", into = c("plate_date", "cell_type", "timepoint", "e", "f"), remove = F) %>%
select(-a, -b, -d, -e, -f, -X1, -Timepoint) %>%
mutate(Row = as.character(Row)) %>%
mutate(Strain = ifelse(Strain == "Non-infected", yes = "non-infected", no = as.character(Strain))) %>%
mutate(`Cell Type` = ifelse(`Cell Type` == "THP1 Casp1-/-", yes = "THP1 casp1-/-", no = as.character(`Cell Type`)))
raw_to_ABC.df <- data_frame(Row = c(1,2,3,4,5,6,7,8), row = c("A","B","C","D","E","F","G","H")) %>%
mutate(Row = as.character(Row))
## Warning: `data_frame()` was deprecated in tibble 1.1.0.
## Please use `tibble()` instead.
Spot_count_clean.df <- left_join(Spot_count_clean.df, raw_to_ABC.df) %>%
select(-Row) %>%
select(Row = row, Column, everything()) %>%
mutate(Column = str_pad(Column, 2, pad = "0")) %>%
mutate(Well = paste0(Row, Column)) %>%
select(Well, everything(), -Replicate) %>%
mutate(sample_id = paste(experiment_id, Well, `Cell Type`, Strain, sep = "#"))
## Joining, by = "Row"
# create df of replicate
sample_replicate_df <- Spot_count_clean.df %>%
select(experiment_id, Well, `Cell Type`, Strain) %>%
distinct() %>%
group_by(experiment_id, `Cell Type`, Strain) %>%
mutate(replicate = row_number()) %>%
ungroup()
# merge with clean data
Spot_count_clean.df <- merge(Spot_count_clean.df, sample_replicate_df, by = c("experiment_id", "Well", "Cell Type", "Strain")) %>%
mutate(timepoint = factor(timepoint, levels = c("1.5h", "5h", "24h"))) %>%
mutate(Strain = factor(Strain, levels = c("WT", "agrA", "non-infected"))) %>%
mutate(`Cell Type` = factor(`Cell Type`, levels = c("THP1 (Cas9)", "THP1 casp1-/-", "THP1 casp-4/5 -/-")))
# count bacteria per field
bac_count_field <- Spot_count_clean.df %>% group_by(timepoint, Well, `Cell Type`, Strain, replicate, Field) %>%
count(name = "number of bacteria/field")
ggplot(bac_count_field, aes(x = Strain, y = `number of bacteria/field`, fill = `Cell Type`)) +
geom_boxplot(outlier.shape = NA)+
geom_point(position=position_jitterdodge(jitter.width = .15, jitter.height = 0),alpha=0.3) +
# geom_jitter(width = .2, alpha= .3) +
facet_grid(~timepoint)

# count infected cells
bac_count_infected <- Spot_count_clean.df %>% group_by(timepoint, Well, `Cell Type`, Strain, replicate, Field) %>%
distinct(`Spots - Object No in Cells`) %>%
count(name = "number of infected cells/field")
# count bacteria per cell
bac_count_cell <- Spot_count_clean.df %>%
group_by(timepoint, Well, `Cell Type`, Strain, replicate, Field, `Spots - Object No in Cells`) %>%
count(name = "number of bacteria/infected cell") %>%
ungroup() %>%
group_by(timepoint, Well, `Cell Type`, Strain, replicate, Field, `number of bacteria/infected cell`) %>%
count(name = "number of infected cells")
ggplot(bac_count_cell, aes(x = `number of bacteria/infected cell`, y = `number of infected cells`, fill = `Cell Type`,
group = interaction(`Cell Type`, `number of bacteria/infected cell`))) +
geom_boxplot(outlier.shape = NA, position = "dodge2")+
geom_point(position=position_jitterdodge(jitter.width = .15, jitter.height = .15),alpha=0.3, , colour = "black") +
facet_grid(Strain ~timepoint)+
xlim(0,7)
## Warning: Removed 11 rows containing missing values (stat_boxplot).
## Warning: Removed 1 rows containing missing values (geom_segment).
## Warning: Removed 2 rows containing missing values (geom_segment).
## Warning: Removed 1 rows containing missing values (geom_segment).
## Warning: Removed 2 rows containing missing values (geom_segment).
## Warning: Removed 1 rows containing missing values (geom_segment).
## Warning: Removed 1 rows containing missing values (geom_segment).
## Warning: Removed 15 rows containing missing values (geom_point).

ggplot(bac_count_cell, aes(x = `number of bacteria/infected cell`, y = `number of infected cells`, fill = `Strain`,
group = interaction(`Strain`, `number of bacteria/infected cell`))) +
geom_boxplot(outlier.shape = NA, position = "dodge2")+
geom_point(position=position_jitterdodge(jitter.width = .15, jitter.height = .15),alpha=0.3, , colour = "black") +
facet_grid(`Cell Type` ~timepoint)+
xlim(0,7)
## Warning: Removed 11 rows containing missing values (stat_boxplot).
## Warning: Removed 2 rows containing missing values (geom_segment).
## Warning: Removed 1 rows containing missing values (geom_segment).
## Warning: Removed 1 rows containing missing values (geom_segment).
## Warning: Removed 1 rows containing missing values (geom_segment).
## Warning: Removed 1 rows containing missing values (geom_segment).
## Warning: Removed 1 rows containing missing values (geom_segment).
## Warning: Removed 19 rows containing missing values (geom_point).

# Plot as specified by Abdou
p1 <- ggplot(bac_count_field, aes(x = timepoint, y = `number of bacteria/field`, fill = `Cell Type`)) +
geom_boxplot(outlier.shape = NA)+
geom_point(position=position_jitterdodge(jitter.width = .15, jitter.height = 0),alpha=0.3) +
# geom_jitter(width = .2, alpha= .3) +
facet_grid(~Strain) +
ylab("intracellular bacteria per field") +
theme_bw()
p1

p2 <- ggplot(bac_count_field %>%
filter(`Cell Type` != "THP1 casp-4/5 -/-")
, aes(x = timepoint, y = `number of bacteria/field`, fill = `Cell Type`)) +
geom_boxplot(outlier.shape = NA)+
geom_point(position=position_jitterdodge(jitter.width = .15, jitter.height = 0),alpha=0.3) +
# geom_jitter(width = .2, alpha= .3) +
facet_grid(~Strain) +
ylab("intracellular bacteria per field") +
theme_bw()
p2

p3 <- ggplot(bac_count_field %>%
filter(`Cell Type` != "THP1 casp1-/-")
, aes(x = timepoint, y = `number of bacteria/field`, fill = `Cell Type`)) +
geom_boxplot(outlier.shape = NA)+
geom_point(position=position_jitterdodge(jitter.width = .15, jitter.height = 0),alpha=0.3) +
# geom_jitter(width = .2, alpha= .3) +
facet_grid(~Strain) +
ylab("intracellular bacteria per field") +
theme_bw()
p3

p4 <- ggplot(bac_count_cell, aes(x = `number of bacteria/infected cell`, y = `number of infected cells`, fill = `Cell Type`,
group = interaction(`Cell Type`, `number of bacteria/infected cell`))) +
geom_boxplot(outlier.shape = NA, position = "dodge2")+
geom_point(position=position_jitterdodge(jitter.width = .15, jitter.height = .15),alpha=0.3, , colour = "black") +
facet_grid(Strain~timepoint) +
theme_bw() +
scale_x_discrete(limits=c(1,2,3,4,5,6,7,8,9,10))
## Warning: Continuous limits supplied to discrete scale.
## Did you mean `limits = factor(...)` or `scale_*_continuous()`?
p4

p5 <- ggplot(bac_count_cell %>%
filter(`Cell Type` != "THP1 casp-4/5 -/-")
, aes(x = `number of bacteria/infected cell`, y = `number of infected cells`, fill = `Cell Type`,
group = interaction(`Cell Type`, `number of bacteria/infected cell`))) +
geom_boxplot(outlier.shape = NA, position = "dodge2")+
geom_point(position=position_jitterdodge(jitter.width = .15, jitter.height = .15),alpha=0.3, , colour = "black") +
facet_grid(Strain~timepoint) +
theme_bw() +
scale_x_discrete(limits=c(1,2,3,4,5,6,7,8,9,10))
## Warning: Continuous limits supplied to discrete scale.
## Did you mean `limits = factor(...)` or `scale_*_continuous()`?
p5

p6 <- ggplot(bac_count_cell %>%
filter(`Cell Type` != "THP1 casp1-/-")
, aes(x = `number of bacteria/infected cell`, y = `number of infected cells`, fill = `Cell Type`,
group = interaction(`Cell Type`, `number of bacteria/infected cell`))) +
geom_boxplot(outlier.shape = NA, position = "dodge2")+
geom_point(position=position_jitterdodge(jitter.width = .15, jitter.height = .15),alpha=0.3, , colour = "black") +
facet_grid(Strain~timepoint) +
theme_bw() +
scale_x_discrete(limits=c(1,2,3,4,5,6,7,8,9,10))
## Warning: Continuous limits supplied to discrete scale.
## Did you mean `limits = factor(...)` or `scale_*_continuous()`?
p6

# idem without points
p7 <- ggplot(bac_count_cell, aes(x = `number of bacteria/infected cell`, y = `number of infected cells`, fill = `Cell Type`,
group = interaction(`Cell Type`, `number of bacteria/infected cell`))) +
geom_boxplot(outlier.shape = NA, position = "dodge2")+
#geom_point(position=position_jitterdodge(jitter.width = .15, jitter.height = .15),alpha=0.3, , colour = "black") +
facet_grid(Strain~timepoint) +
theme_bw() +
scale_x_discrete(limits=c(1,2,3,4,5,6,7,8,9,10))
## Warning: Continuous limits supplied to discrete scale.
## Did you mean `limits = factor(...)` or `scale_*_continuous()`?
p7

p8 <- ggplot(bac_count_cell %>%
filter(`Cell Type` != "THP1 casp-4/5 -/-")
, aes(x = `number of bacteria/infected cell`, y = `number of infected cells`, fill = `Cell Type`,
group = interaction(`Cell Type`, `number of bacteria/infected cell`))) +
geom_boxplot(outlier.shape = NA, position = "dodge2")+
#geom_point(position=position_jitterdodge(jitter.width = .15, jitter.height = .15),alpha=0.3, , colour = "black") +
facet_grid(Strain~timepoint) +
theme_bw() +
scale_x_discrete(limits=c(1,2,3,4,5,6,7,8,9,10))
## Warning: Continuous limits supplied to discrete scale.
## Did you mean `limits = factor(...)` or `scale_*_continuous()`?
p8

p9 <- ggplot(bac_count_cell %>%
filter(`Cell Type` != "THP1 casp1-/-")
, aes(x = `number of bacteria/infected cell`, y = `number of infected cells`, fill = `Cell Type`,
group = interaction(`Cell Type`, `number of bacteria/infected cell`))) +
geom_boxplot(outlier.shape = NA, position = "dodge2")+
#geom_point(position=position_jitterdodge(jitter.width = .15, jitter.height = .15),alpha=0.3, , colour = "black") +
facet_grid(Strain~timepoint) +
theme_bw() +
scale_x_discrete(limits=c(1,2,3,4,5,6,7,8,9,10))
## Warning: Continuous limits supplied to discrete scale.
## Did you mean `limits = factor(...)` or `scale_*_continuous()`?
p9

# Plot total number of infected cells
p10 <- ggplot(bac_count_infected, aes(x = timepoint, y = `number of infected cells/field`, fill = `Cell Type`)) +
geom_boxplot(outlier.shape = NA)+
geom_point(position=position_jitterdodge(jitter.width = .15, jitter.height = 0),alpha=0.3) +
# geom_jitter(width = .2, alpha= .3) +
facet_grid(~Strain) +
ylab("infected cells per field") +
theme_bw()
p10

p11 <- ggplot(bac_count_infected %>%
filter(`Cell Type` != "THP1 casp-4/5 -/-"), aes(x = timepoint, y = `number of infected cells/field`, fill = `Cell Type`)) +
geom_boxplot(outlier.shape = NA)+
geom_point(position=position_jitterdodge(jitter.width = .15, jitter.height = 0),alpha=0.3) +
# geom_jitter(width = .2, alpha= .3) +
facet_grid(~Strain) +
ylab("infected cells per field") +
theme_bw()
p11

p12 <- ggplot(bac_count_infected %>%
filter(`Cell Type` != "THP1 casp1-/-"), aes(x = timepoint, y = `number of infected cells/field`, fill = `Cell Type`)) +
geom_boxplot(outlier.shape = NA)+
geom_point(position=position_jitterdodge(jitter.width = .15, jitter.height = 0),alpha=0.3) +
# geom_jitter(width = .2, alpha= .3) +
facet_grid(~Strain) +
ylab("infected cells per field") +
theme_bw()
p12

# note: umber of infected increase for agrA between 5h and 24h -> less seeded cells on 5h plate?? => need to correct by total number of cells
# Plot total nb cells and pct f infected cells ----
Cell_count.df <-list.files(path = "Operetta/raw_data/210921 THP1 n2/", pattern = "*Cells", full.names = T, recursive = T) %>%
map_df(~read_tsv_filename(.))
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## `Cell Type` = col_character(),
## Strain = col_character(),
## Replicate = col_double(),
## Row = col_double(),
## Column = col_double(),
## Plane = col_double(),
## Timepoint = col_double(),
## Field = col_double(),
## `Object No` = col_double(),
## X = col_double(),
## Y = col_double(),
## `Bounding Box` = col_character(),
## `Cells - Total Spot Area` = col_double(),
## `Cells - Relative Spot Intensity` = col_double(),
## `Cells - Number of Spots` = col_double(),
## `Cells - Number of Spots per Area of Cell` = col_double(),
## `Cells - Spots per Cell Mean` = col_double()
## )
##
## ! Multiple files in zip: reading ''[Content_Types].xml''
## Warning: Missing column names filled in: 'X1' [1]
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## X1 = col_character()
## )
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## `Cell Type` = col_character(),
## Strain = col_character(),
## Replicate = col_double(),
## Row = col_double(),
## Column = col_double(),
## Plane = col_double(),
## Field = col_double(),
## `Object No` = col_double(),
## X = col_double(),
## Y = col_double(),
## `Bounding Box` = col_character(),
## `Cells - Total Spot Area` = col_double(),
## `Cells - Relative Spot Intensity` = col_double(),
## `Cells - Number of Spots` = col_double(),
## `Cells - Number of Spots per Area of Cell` = col_double(),
## `Cells - Spots per Cell Mean` = col_double()
## )
## ! Multiple files in zip: reading ''[Content_Types].xml''
## Warning: Missing column names filled in: 'X1' [1]
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## X1 = col_character()
## )
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## `Cell Type` = col_character(),
## Strain = col_character(),
## Replicate = col_double(),
## Row = col_double(),
## Column = col_double(),
## Plane = col_double(),
## Field = col_double(),
## `Object No` = col_double(),
## X = col_double(),
## Y = col_double(),
## `Bounding Box` = col_character(),
## `Cells - Total Spot Area` = col_double(),
## `Cells - Relative Spot Intensity` = col_double(),
## `Cells - Number of Spots` = col_double(),
## `Cells - Number of Spots per Area of Cell` = col_double(),
## `Cells - Spots per Cell Mean` = col_double()
## )
## ! Multiple files in zip: reading ''[Content_Types].xml''
## Warning: Missing column names filled in: 'X1' [1]
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## X1 = col_character()
## )
unique(Spot_count.df$filename)
## [1] "Operetta/raw_data/210921 THP1 n2//210921_THP1_1.5h_Objects_Population - Spots.tsv"
## [2] "Operetta/raw_data/210921 THP1 n2//210921_THP1_24h_Objects_Population - Spots.tsv"
## [3] "Operetta/raw_data/210921 THP1 n2//210921_THP1_5h_Objects_Population - Spots.tsv"
Cell_count_clean.df <- Cell_count.df %>%
separate(col = filename, sep = "/", into = c("a","b","c", "d", "experiment_id")) %>%
separate(col = experiment_id, sep = "_", into = c("plate_date", "cell_type", "timepoint", "e", "f"), remove = F) %>%
select(-a, -b, -c, -d, -e, -f, -X1, -Timepoint) %>%
mutate(Row = as.character(Row)) %>%
mutate(Strain = ifelse(Strain == "Non-infected", yes = "non-infected", no = as.character(Strain))) %>%
mutate(`Cell Type` = ifelse(`Cell Type` == "THP1 Casp1-/-", yes = "THP1 casp1-/-", no = as.character(`Cell Type`))) %>%
mutate( Cells = ifelse(`Cells - Number of Spots` > 0, yes = "infected cells", "non-infected cells")) %>%
mutate(timepoint = factor(timepoint, levels = c("1.5h", "5h", "24h"))) %>%
mutate(Strain = factor(Strain, levels = c("WT", "agrA", "non-infected"))) %>%
mutate(`Cell Type` = factor(`Cell Type`, levels = c("THP1 (Cas9)", "THP1 casp1-/-", "THP1 casp-4/5 -/-")))
total_cells_per_field <- Cell_count_clean.df %>% group_by(timepoint, Row, Column, `Cell Type`, Strain, Replicate, Field) %>%
count(name = "number of cells") %>%
ungroup()
infected_cells_per_field <- Cell_count_clean.df %>%
filter(Cells == "infected cells") %>%
group_by(timepoint, Row, Column, `Cell Type`, Strain, Replicate, Field) %>%
count(name = "number of infected cells") %>%
ungroup()
pct_infected_cells_per_field <- merge(total_cells_per_field, infected_cells_per_field,
by = c("timepoint", "Row", "Column", "Cell Type", "Strain", "Replicate", "Field"),
all.x = T) %>%
mutate(`number of infected cells` = ifelse(is.na(`number of infected cells`),yes = 0, no = `number of infected cells`)) %>%
mutate(`% of infected cells` = (`number of infected cells`/`number of cells`)*100)
p13 <- ggplot(pct_infected_cells_per_field, aes(x = timepoint, y = `number of cells`, fill = `Cell Type`)) +
geom_boxplot(outlier.shape = NA)+
geom_point(position=position_jitterdodge(jitter.width = .15, jitter.height = 0),alpha=0.3) +
# geom_jitter(width = .2, alpha= .3) +
facet_grid(~Strain) +
# ylab("intracellular bacteria per field") +
theme_bw()
p13

p14 <- ggplot(pct_infected_cells_per_field, aes(x = timepoint, y = `number of infected cells`, fill = `Cell Type`)) +
geom_boxplot(outlier.shape = NA)+
geom_point(position=position_jitterdodge(jitter.width = .15, jitter.height = 0),alpha=0.3) +
# geom_jitter(width = .2, alpha= .3) +
facet_grid(~Strain) +
# ylab("intracellular bacteria per field") +
theme_bw()
p14

p15 <- ggplot(pct_infected_cells_per_field, aes(x = timepoint, y = `% of infected cells`, fill = `Cell Type`)) +
geom_boxplot(outlier.shape = NA)+
geom_point(position=position_jitterdodge(jitter.width = .15, jitter.height = 0),alpha=0.3) +
# geom_jitter(width = .2, alpha= .3) +
facet_grid(~Strain) +
# ylab("intracellular bacteria per field") +
theme_bw()
p15

p16 <- ggplot(pct_infected_cells_per_field %>%
filter(`Cell Type` != "THP1 casp-4/5 -/-"), aes(x = timepoint, y = `number of cells`, fill = `Cell Type`)) +
geom_boxplot(outlier.shape = NA)+
geom_point(position=position_jitterdodge(jitter.width = .15, jitter.height = 0),alpha=0.3) +
# geom_jitter(width = .2, alpha= .3) +
facet_grid(~Strain) +
# ylab("intracellular bacteria per field") +
theme_bw()
p16

p17 <- ggplot(pct_infected_cells_per_field %>%
filter(`Cell Type` != "THP1 casp-4/5 -/-"), aes(x = timepoint, y = `number of infected cells`, fill = `Cell Type`)) +
geom_boxplot(outlier.shape = NA)+
geom_point(position=position_jitterdodge(jitter.width = .15, jitter.height = 0),alpha=0.3) +
# geom_jitter(width = .2, alpha= .3) +
facet_grid(~Strain) +
# ylab("intracellular bacteria per field") +
theme_bw()
p17

p18 <- ggplot(pct_infected_cells_per_field %>%
filter(`Cell Type` != "THP1 casp-4/5 -/-"), aes(x = timepoint, y = `% of infected cells`, fill = `Cell Type`)) +
geom_boxplot(outlier.shape = NA)+
geom_point(position=position_jitterdodge(jitter.width = .15, jitter.height = 0),alpha=0.3) +
# geom_jitter(width = .2, alpha= .3) +
facet_grid(~Strain) +
# ylab("intracellular bacteria per field") +
theme_bw()
p18

p19 <- ggplot(pct_infected_cells_per_field %>%
filter(`Cell Type` != "THP1 casp1-/-"), aes(x = timepoint, y = `number of cells`, fill = `Cell Type`)) +
geom_boxplot(outlier.shape = NA)+
geom_point(position=position_jitterdodge(jitter.width = .15, jitter.height = 0),alpha=0.3) +
# geom_jitter(width = .2, alpha= .3) +
facet_grid(~Strain) +
# ylab("intracellular bacteria per field") +
theme_bw()
p19

p20 <- ggplot(pct_infected_cells_per_field %>%
filter(`Cell Type` != "THP1 casp1-/-"), aes(x = timepoint, y = `number of infected cells`, fill = `Cell Type`)) +
geom_boxplot(outlier.shape = NA)+
geom_point(position=position_jitterdodge(jitter.width = .15, jitter.height = 0),alpha=0.3) +
# geom_jitter(width = .2, alpha= .3) +
facet_grid(~Strain) +
# ylab("intracellular bacteria per field") +
theme_bw()
p20

p21 <- ggplot(pct_infected_cells_per_field %>%
filter(`Cell Type` != "THP1 casp1-/-"), aes(x = timepoint, y = `% of infected cells`, fill = `Cell Type`)) +
geom_boxplot(outlier.shape = NA)+
geom_point(position=position_jitterdodge(jitter.width = .15, jitter.height = 0),alpha=0.3) +
# geom_jitter(width = .2, alpha= .3) +
facet_grid(~Strain) +
# ylab("intracellular bacteria per field") +
theme_bw()
p21

# check signals differences infected vs non-infected and try to remove background bact. counts in non-infected ----
ninf.df <- Spot_count_clean.df %>%
filter(Strain == "non-infected") %>%
select(starts_with("Spots")) %>%
gather() %>%
mutate(Strain = "non-infected")
inf.df <- Spot_count_clean.df %>%
filter(Strain != "non-infected") %>%
select(starts_with("Spots")) %>%
gather() %>%
mutate(Strain = "infected")
ninf_inf.df <- rbind(ninf.df, inf.df) %>%
filter(!is.na(value))
unique(ninf_inf.df$key)
## [1] "Spots - Relative Spot Intensity"
## [2] "Spots - Corrected Spot Intensity"
## [3] "Spots - Uncorrected Spot Peak Intensity"
## [4] "Spots - Spot Contrast"
## [5] "Spots - Spot Background Intensity"
## [6] "Spots - Spot Area [px²]"
## [7] "Spots - Region Intensity"
## [8] "Spots - Spot To Region Intensity"
## [9] "Spots - Object No in Cells"
## [10] "Spots - Spot Area [px²]"
ggplot(ninf_inf.df, aes(x = value, colour = Strain)) +
geom_density() +
facet_wrap(~ key, scales = "free")

ggplot(ninf_inf.df, aes(x = value, colour = Strain)) +
geom_density() +
facet_wrap(~ key, scales = "free") +
xlim(0, 100)
## Warning: Removed 18859 rows containing non-finite values (stat_density).

# based on distribution backround spot/bacteria can be removed by filtering Spot area > 37.5
Spot_count_clean_clean.df <- Spot_count_clean.df %>%
filter(`Spots - Spot Area [px²]` < 37.5)
# Count and re plot after filter
bac_count_field <- Spot_count_clean_clean.df %>% group_by(timepoint, Well, `Cell Type`, Strain, replicate, Field) %>%
count(name = "number of bacteria/field")
ggplot(bac_count_field, aes(x = Strain, y = `number of bacteria/field`, fill = `Cell Type`)) +
geom_boxplot(outlier.shape = NA)+
geom_point(position=position_jitterdodge(jitter.width = .15, jitter.height = 0),alpha=0.3) +
# geom_jitter(width = .2, alpha= .3) +
facet_grid(~timepoint)

# count bacteria per cell
bac_count_cell <- Spot_count_clean_clean.df %>%
group_by(timepoint, Well, `Cell Type`, Strain, replicate, Field, `Spots - Object No in Cells`) %>%
count(name = "number of bacteria/infected cell") %>%
ungroup() %>%
group_by(timepoint, Well, `Cell Type`, Strain, replicate, Field, `number of bacteria/infected cell`) %>%
count()
ggplot(bac_count_cell, aes(x = `number of bacteria/infected cell`, y = n, fill = `Cell Type`,
group = interaction(`Cell Type`, `number of bacteria/infected cell`))) +
geom_boxplot(outlier.shape = NA, position = "dodge2")+
geom_point(position=position_jitterdodge(jitter.width = .15, jitter.height = .15),alpha=0.3, , colour = "black") +
facet_grid(Strain ~timepoint)+
xlim(0,7)
## Warning: Removed 4 rows containing missing values (stat_boxplot).
## Warning: Removed 1 rows containing missing values (geom_segment).
## Warning: Removed 5 rows containing missing values (geom_point).

ggplot(bac_count_cell, aes(x = `number of bacteria/infected cell`, y = n, fill = `Strain`,
group = interaction(`Strain`, `number of bacteria/infected cell`))) +
geom_boxplot(outlier.shape = NA, position = "dodge2")+
geom_point(position=position_jitterdodge(jitter.width = .15, jitter.height = .15),alpha=0.3, , colour = "black") +
facet_grid(`Cell Type` ~timepoint)

xlim(0,7)
## <ScaleContinuousPosition>
## Range:
## Limits: 0 -- 7
# Note: doesn't work remove most of the data -> need to filter background during image processing seps